Set Up

rm(list = ls())
library(tidyverse)
library(here)
library(phyloseq)
library(vegan)
set.seed(2024)
theme_set(theme_bw())
max.core <- parallel::detectCores()

ps.rare <- readRDS(here('data','following_study','ps_rarefied.rds')) 
sample_data(ps.rare)$Shannon <- estimate_richness(ps.rare)$Shannon
# transform data into proportion
ps.rare.prop <- ps.rare %>% transform_sample_counts(function(x) x/sum(x))

sam <- data.frame(sample_data(ps.rare))

Functions

plot_ord draws ordination plot for different factors using plot_ordination function in phyloseq package.

permanova performs permutational multivariate analysis of variance (PERMANOVA) based on adonis2 function in vegan package.

permdisp performs permutational analysis of multivariate dispersions (PERMDISP) based on betadisper function in vegan package.

plot_ord <- function(data, factor, method, distance){
    data.ord <- ordinate(data, method = method, distance = distance)
    p <- plot_ordination(data, data.ord, color = factor)
    p <- p + stat_ellipse(type = "t",geom = "polygon",alpha = 0)
    p <- p + ggtitle(str_c(factor,method,distance, sep = ' - '))
    print(p)
}
permanova <- function(data, formula, method, permutations=1e4, strata = NULL, core = max.core){
    message('PERMANOVA Model: ', method, '~', formula, '; Strata: ', ifelse(is_null(strata), 'None', as.character(strata)))
    dist.matrix <- phyloseq::distance(data, method=method)
    df <- data.frame(sample_data(data))
    model <- as.formula(paste0('dist.matrix~', formula))
    if (!is_null(strata)) {strata <- df[,strata]}
    result <- adonis2(model,
                      data = df,
                      permutations=permutations,
                      strata = strata,
                      parallel = core,
                      by = 'term',
                      na.action = na.omit)
    return(result)
}
permdisp <- function(data, group, method, permutations=1e4, pairwise = FALSE, core = max.core){
    message('PERMDISP Model: ', method, '~', group)
    dist.matrix <- phyloseq::distance(data, method=method)
    df <- data.frame(sample_data(data))
    beta.disp <- betadisper(dist.matrix, group = df[,group])
    result <- permutest(beta.disp, permutations = permutations, pairwise = pairwise, type = 'centroid')
    return(result)
}

Start

In this section, we want to estimate the effect of different factors on the microbial diversity. The factors we are focusing on are Household, Epileptic.or.Control, Breed.Group..1., Pheno.Y.N, Sex, and Age..months.. We compare the species richness (Shannon index) among different factors using ANOVA, and compare the centroid of dissimilarity of microbial community between different groups using PERMANOVA using the Bray-Curtis and weighted Unifrac distance, and visualized using multi-dimensional scaling. PERMDISP was used to test the homogeneity of multivariate dispersions among groups.

Household Effect

Alpha Diversity

ggplot(sam,aes(x = as.numeric(Household), y = Shannon, group = Household)) +
    geom_point() + geom_line() + xlab('Household')

anova(lm(Shannon~Household, data = sam))
## Analysis of Variance Table
## 
## Response: Shannon
##           Df Sum Sq Mean Sq F value  Pr(>F)  
## Household 48 8.8524 0.18443  1.5768 0.05785 .
## Residuals 49 5.7309 0.11696                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Here we see the Shannon diversity index is significantly different among households.

Beta Diversity

Bray-Curtis distance

permanova(ps.rare.prop, 'Household', 'bray')
## PERMANOVA Model: bray~Household; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##           Df SumOfSqs      R2      F    Pr(>F)    
## Household 48  12.2360 0.68899 2.2615 9.999e-05 ***
## Residual  49   5.5233 0.31101                     
## Total     97  17.7594 1.00000                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Weighted-Unifrac distance

permanova(ps.rare.prop, 'Household', 'wunifrac')
## PERMANOVA Model: wunifrac~Household; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##           Df SumOfSqs      R2      F    Pr(>F)    
## Household 48  1.90478 0.67668 2.1365 9.999e-05 ***
## Residual  49  0.91012 0.32332                     
## Total     97  2.81490 1.00000                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Epileptic Effect

Alpha Diversity

ggplot(sam, aes(x = Epileptic.or.Control, y = Shannon)) + 
    geom_boxplot() + geom_jitter(height = 0, width = 0.25)

anova(lm(Shannon~Epileptic.or.Control, data = sam))
## Analysis of Variance Table
## 
## Response: Shannon
##                      Df  Sum Sq  Mean Sq F value Pr(>F)
## Epileptic.or.Control  1  0.0069 0.006919  0.0456 0.8314
## Residuals            96 14.5764 0.151838

Beta Diversity

Bray-Curtis distance

plot_ord(ps.rare.prop, 'Epileptic.or.Control','MDS','bray')

plot_ord(ps.rare.prop, 'Epileptic.or.Control','NMDS','bray')
## Run 0 stress 0.2090031 
## Run 1 stress 0.2120165 
## Run 2 stress 0.2221277 
## Run 3 stress 0.2190454 
## Run 4 stress 0.2104827 
## Run 5 stress 0.2342869 
## Run 6 stress 0.2306033 
## Run 7 stress 0.2052351 
## ... New best solution
## ... Procrustes: rmse 0.02742164  max resid 0.2502951 
## Run 8 stress 0.2222867 
## Run 9 stress 0.2047395 
## ... New best solution
## ... Procrustes: rmse 0.03872348  max resid 0.2940312 
## Run 10 stress 0.2225866 
## Run 11 stress 0.2219482 
## Run 12 stress 0.2116877 
## Run 13 stress 0.2104571 
## Run 14 stress 0.2100755 
## Run 15 stress 0.2049004 
## ... Procrustes: rmse 0.04019767  max resid 0.3040893 
## Run 16 stress 0.2092277 
## Run 17 stress 0.2197072 
## Run 18 stress 0.2054718 
## Run 19 stress 0.2047057 
## ... New best solution
## ... Procrustes: rmse 0.0358456  max resid 0.3316494 
## Run 20 stress 0.2052053 
## ... Procrustes: rmse 0.04920655  max resid 0.4172468 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      3: no. of iterations >= maxit
##     17: stress ratio > sratmax

permanova(ps.rare.prop, 'Epileptic.or.Control', 'bray', strata = 'Household')
## PERMANOVA Model: bray~Epileptic.or.Control; Strata: Household
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  strata 
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##                      Df SumOfSqs      R2      F  Pr(>F)  
## Epileptic.or.Control  1   0.1578 0.00889 0.8608 0.09429 .
## Residual             96  17.6015 0.99111                 
## Total                97  17.7594 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permdisp(ps.rare.prop, 'Epileptic.or.Control', 'bray')
## PERMDISP Model: bray~Epileptic.or.Control
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
## Groups     1 0.00141 0.0014051 0.0939  10000  0.765
## Residuals 96 1.43592 0.0149575

Weighted-Unifrac distance

plot_ord(ps.rare.prop, 'Epileptic.or.Control','MDS','wunifrac')

plot_ord(ps.rare.prop, 'Epileptic.or.Control','NMDS','wunifrac')
## Run 0 stress 0.1675024 
## Run 1 stress 0.1722831 
## Run 2 stress 0.1662048 
## ... New best solution
## ... Procrustes: rmse 0.04420138  max resid 0.1503383 
## Run 3 stress 0.1681236 
## Run 4 stress 0.1659424 
## ... New best solution
## ... Procrustes: rmse 0.0621379  max resid 0.3563645 
## Run 5 stress 0.170698 
## Run 6 stress 0.1665096 
## Run 7 stress 0.1701394 
## Run 8 stress 0.1641125 
## ... New best solution
## ... Procrustes: rmse 0.01648458  max resid 0.1126768 
## Run 9 stress 0.176551 
## Run 10 stress 0.1691928 
## Run 11 stress 0.1739056 
## Run 12 stress 0.1714101 
## Run 13 stress 0.1741002 
## Run 14 stress 0.1675946 
## Run 15 stress 0.1694778 
## Run 16 stress 0.1676259 
## Run 17 stress 0.1713354 
## Run 18 stress 0.1839347 
## Run 19 stress 0.1749329 
## Run 20 stress 0.1686205 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     20: stress ratio > sratmax

permanova(ps.rare.prop, 'Epileptic.or.Control', 'wunifrac', strata = 'Household')
## PERMANOVA Model: wunifrac~Epileptic.or.Control; Strata: Household
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  strata 
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##                      Df SumOfSqs      R2      F Pr(>F)  
## Epileptic.or.Control  1  0.03991 0.01418 1.3808 0.0442 *
## Residual             96  2.77499 0.98582                
## Total                97  2.81490 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permdisp(ps.rare.prop, 'Epileptic.or.Control', 'wunifrac')
## PERMDISP Model: wunifrac~Epileptic.or.Control
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq     F N.Perm Pr(>F)
## Groups     1 0.00666 0.0066565 1.769  10000 0.1855
## Residuals 96 0.36123 0.0037628

Breed Effect

Alpha Diversity

sam.breed <- sam %>% filter(is.na(Breed.Group..1.) == FALSE)
ggplot(sam.breed) +
    geom_point(aes(x = Breed.Group..1., y = Shannon, colour = Breed.Group..1.)) +
    facet_wrap(~Epileptic.or.Control) + 
    theme(axis.text.x = element_blank(), axis.ticks.x.bottom = element_blank())

anova(lm(Shannon~Household + Breed.Group..1., data = sam.breed))
## Analysis of Variance Table
## 
## Response: Shannon
##                 Df Sum Sq Mean Sq F value  Pr(>F)  
## Household       45 7.3436 0.16319  1.5502 0.08251 .
## Breed.Group..1.  4 1.2154 0.30384  2.8863 0.03473 *
## Residuals       39 4.1056 0.10527                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Beta Diversity

Bray-Curtis distance

plot_ord(ps.rare.prop, 'Breed.Group..1.','MDS','bray')
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse

plot_ord(ps.rare.prop, 'Breed.Group..1.','NMDS','bray')
## Run 0 stress 0.2090031 
## Run 1 stress 0.2221095 
## Run 2 stress 0.2218702 
## Run 3 stress 0.2389149 
## Run 4 stress 0.2036268 
## ... New best solution
## ... Procrustes: rmse 0.04225573  max resid 0.2746299 
## Run 5 stress 0.2106465 
## Run 6 stress 0.2062254 
## Run 7 stress 0.2224712 
## Run 8 stress 0.2324198 
## Run 9 stress 0.2051928 
## Run 10 stress 0.2058761 
## Run 11 stress 0.2034668 
## ... New best solution
## ... Procrustes: rmse 0.01469357  max resid 0.09146216 
## Run 12 stress 0.2201584 
## Run 13 stress 0.2057869 
## Run 14 stress 0.2057415 
## Run 15 stress 0.227621 
## Run 16 stress 0.2314833 
## Run 17 stress 0.2103726 
## Run 18 stress 0.2062695 
## Run 19 stress 0.2119056 
## Run 20 stress 0.2036101 
## ... Procrustes: rmse 0.005697988  max resid 0.03027788 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      7: no. of iterations >= maxit
##     13: stress ratio > sratmax
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse

permanova(ps.rare.prop, 'Household + Breed.Group..1.', 'bray')
## PERMANOVA Model: bray~Household + Breed.Group..1.; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##                 Df SumOfSqs      R2      F    Pr(>F)    
## Household       45  11.0182 0.69597 2.3051 9.999e-05 ***
## Breed.Group..1.  4   0.6705 0.04235 1.5781    0.0164 *  
## Residual        39   4.1427 0.26167                     
## Total           88  15.8315 1.00000                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permdisp(ps.rare.prop, 'Breed.Group..1.', 'bray')
## PERMDISP Model: bray~Breed.Group..1.
## missing observations due to 'group' removed
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)
## Groups     6 0.08509 0.014182 0.8503  10000 0.5275
## Residuals 82 1.36770 0.016679

Weighted-Unifrac distance

plot_ord(ps.rare.prop, 'Breed.Group..1.','MDS','wunifrac')
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse

plot_ord(ps.rare.prop, 'Breed.Group..1.','NMDS','wunifrac')
## Run 0 stress 0.1675024 
## Run 1 stress 0.1681173 
## Run 2 stress 0.1700059 
## Run 3 stress 0.1671379 
## ... New best solution
## ... Procrustes: rmse 0.06283511  max resid 0.1975331 
## Run 4 stress 0.1692047 
## Run 5 stress 0.1708241 
## Run 6 stress 0.1677683 
## Run 7 stress 0.1745307 
## Run 8 stress 0.1702544 
## Run 9 stress 0.1658768 
## ... New best solution
## ... Procrustes: rmse 0.0633012  max resid 0.2456404 
## Run 10 stress 0.1715445 
## Run 11 stress 0.1681171 
## Run 12 stress 0.1774195 
## Run 13 stress 0.1699304 
## Run 14 stress 0.1687288 
## Run 15 stress 0.1715728 
## Run 16 stress 0.1778285 
## Run 17 stress 0.1735431 
## Run 18 stress 0.1660763 
## ... Procrustes: rmse 0.06638404  max resid 0.2432446 
## Run 19 stress 0.1668454 
## Run 20 stress 0.169956 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      1: no. of iterations >= maxit
##     19: stress ratio > sratmax
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse

permanova(ps.rare.prop, 'Household + Breed.Group..1.', 'wunifrac')
## PERMANOVA Model: wunifrac~Household + Breed.Group..1.; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##                 Df SumOfSqs      R2      F    Pr(>F)    
## Household       45  1.72349 0.68078 2.2577 9.999e-05 ***
## Breed.Group..1.  4  0.14657 0.05790 2.1601    0.0043 ** 
## Residual        39  0.66159 0.26133                     
## Total           88  2.53165 1.00000                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permdisp(ps.rare.prop, 'Breed.Group..1.', 'wunifrac')
## PERMDISP Model: wunifrac~Breed.Group..1.
## missing observations due to 'group' removed
## Warning in betadisper(dist.matrix, group = df[, group]): some squared distances
## are negative and changed to zero
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
## Groups     6 0.00967 0.0016125 0.3415  10000 0.9156
## Residuals 82 0.38717 0.0047216

Drug Effect

Alpha Diversity

sam.drug <- sam %>% filter(Epileptic.or.Control == 'Epileptic')
ggplot(sam.drug, aes(x = Pheno.Y.N, y = Shannon)) + 
    geom_boxplot() + geom_jitter(height = 0, width = 0.25)

anova(lm(Shannon~Pheno.Y.N, data = sam.drug))
## Analysis of Variance Table
## 
## Response: Shannon
##           Df Sum Sq Mean Sq F value Pr(>F)
## Pheno.Y.N  1 0.1613 0.16130  0.9382 0.3377
## Residuals 47 8.0806 0.17193

Beta Diversity

Bray-Curtis distance

ps.drug <- ps.rare.prop %>% subset_samples(Epileptic.or.Control == 'Epileptic')
plot_ord(ps.drug, 'Pheno.Y.N','MDS','bray')

plot_ord(ps.drug, 'Pheno.Y.N','NMDS','bray')
## Run 0 stress 0.2031407 
## Run 1 stress 0.2072023 
## Run 2 stress 0.1997469 
## ... New best solution
## ... Procrustes: rmse 0.04751999  max resid 0.1551351 
## Run 3 stress 0.200341 
## Run 4 stress 0.2169714 
## Run 5 stress 0.2100229 
## Run 6 stress 0.2198162 
## Run 7 stress 0.2084752 
## Run 8 stress 0.1996991 
## ... New best solution
## ... Procrustes: rmse 0.004414889  max resid 0.02030119 
## Run 9 stress 0.2006571 
## Run 10 stress 0.2205248 
## Run 11 stress 0.2165556 
## Run 12 stress 0.2230353 
## Run 13 stress 0.1996998 
## ... Procrustes: rmse 0.0004139606  max resid 0.00219551 
## ... Similar to previous best
## Run 14 stress 0.2142185 
## Run 15 stress 0.2076748 
## Run 16 stress 0.2348144 
## Run 17 stress 0.1996993 
## ... Procrustes: rmse 0.0001837778  max resid 0.0008905393 
## ... Similar to previous best
## Run 18 stress 0.2087838 
## Run 19 stress 0.2059404 
## Run 20 stress 0.206366 
## *** Best solution repeated 2 times

permanova(ps.drug, 'Pheno.Y.N', 'bray')
## PERMANOVA Model: bray~Pheno.Y.N; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##           Df SumOfSqs      R2      F Pr(>F)
## Pheno.Y.N  1   0.2210 0.02581 1.2452 0.1839
## Residual  47   8.3424 0.97419              
## Total     48   8.5634 1.00000
permdisp(ps.drug, 'Pheno.Y.N', 'bray')
## PERMDISP Model: bray~Pheno.Y.N
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)
## Groups     1 0.01975 0.019745 1.5262  10000 0.2332
## Residuals 47 0.60807 0.012938

Weighted-Unifrac distance

plot_ord(ps.drug, 'Pheno.Y.N','MDS','wunifrac')

plot_ord(ps.drug, 'Pheno.Y.N','NMDS','wunifrac')
## Run 0 stress 0.1773379 
## Run 1 stress 0.1808101 
## Run 2 stress 0.1828621 
## Run 3 stress 0.1886672 
## Run 4 stress 0.190502 
## Run 5 stress 0.1794994 
## Run 6 stress 0.1903665 
## Run 7 stress 0.1763031 
## ... New best solution
## ... Procrustes: rmse 0.04950436  max resid 0.1835024 
## Run 8 stress 0.1920713 
## Run 9 stress 0.1774698 
## Run 10 stress 0.1792793 
## Run 11 stress 0.1829205 
## Run 12 stress 0.1801618 
## Run 13 stress 0.1843516 
## Run 14 stress 0.1968082 
## Run 15 stress 0.193958 
## Run 16 stress 0.1797889 
## Run 17 stress 0.1820692 
## Run 18 stress 0.1856406 
## Run 19 stress 0.2036954 
## Run 20 stress 0.1920608 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     20: stress ratio > sratmax

permanova(ps.drug, 'Pheno.Y.N', 'wunifrac')
## PERMANOVA Model: wunifrac~Pheno.Y.N; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##           Df SumOfSqs      R2      F Pr(>F)
## Pheno.Y.N  1  0.02724 0.02206 1.0603 0.3678
## Residual  47  1.20756 0.97794              
## Total     48  1.23480 1.00000
permdisp(ps.drug, 'Pheno.Y.N', 'wunifrac')
## PERMDISP Model: wunifrac~Pheno.Y.N
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
## 
## Response: Distances
##           Df   Sum Sq   Mean Sq      F N.Perm  Pr(>F)  
## Groups     1 0.010846 0.0108459 3.5401  10000 0.06469 .
## Residuals 47 0.143995 0.0030637                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Sex Effect

prop.test(xtabs(~Household+Sex, data = sam))
## Warning in prop.test(xtabs(~Household + Sex, data = sam)): Chi-squared
## approximation may be incorrect
## 
##  49-sample test for equality of proportions without continuity
##  correction
## 
## data:  xtabs(~Household + Sex, data = sam)
## X-squared = 52.464, df = 48, p-value = 0.3051
## alternative hypothesis: two.sided
## sample estimates:
##  prop 1  prop 2  prop 3  prop 4  prop 5  prop 6  prop 7  prop 8  prop 9 prop 10 
##     0.5     0.5     0.0     1.0     0.0     1.0     1.0     0.5     1.0     1.0 
## prop 11 prop 12 prop 13 prop 14 prop 15 prop 16 prop 17 prop 18 prop 19 prop 20 
##     0.5     1.0     1.0     1.0     0.5     0.5     1.0     1.0     0.0     0.5 
## prop 21 prop 22 prop 23 prop 24 prop 25 prop 26 prop 27 prop 28 prop 29 prop 30 
##     0.0     0.5     1.0     0.5     0.5     0.0     0.0     0.0     0.5     0.5 
## prop 31 prop 32 prop 33 prop 34 prop 35 prop 36 prop 37 prop 38 prop 39 prop 40 
##     1.0     0.5     1.0     1.0     0.0     0.5     0.5     0.5     0.5     0.5 
## prop 41 prop 42 prop 43 prop 44 prop 45 prop 46 prop 47 prop 48 prop 49 
##     0.0     1.0     1.0     0.5     0.5     1.0     0.5     1.0     0.5

Alpha Diversity

ggplot(sam, aes(x = Sex, y = Shannon)) + 
    geom_boxplot() + geom_jitter(height = 0, width = 0.25)

anova(lm(Shannon~Sex, data = sam))
## Analysis of Variance Table
## 
## Response: Shannon
##           Df  Sum Sq Mean Sq F value Pr(>F)
## Sex        1  0.1009 0.10091  0.6689 0.4154
## Residuals 96 14.4824 0.15086

Beta Diversity

Bray-Curtis distance

plot_ord(ps.rare.prop, 'Sex','MDS','bray')

plot_ord(ps.rare.prop, 'Sex','NMDS','bray')
## Run 0 stress 0.2090031 
## Run 1 stress 0.2036464 
## ... New best solution
## ... Procrustes: rmse 0.04265362  max resid 0.2778955 
## Run 2 stress 0.2248784 
## Run 3 stress 0.2042766 
## Run 4 stress 0.2048655 
## Run 5 stress 0.2051498 
## Run 6 stress 0.2366194 
## Run 7 stress 0.2052398 
## Run 8 stress 0.2103802 
## Run 9 stress 0.2040754 
## ... Procrustes: rmse 0.01125817  max resid 0.08044039 
## Run 10 stress 0.2046176 
## Run 11 stress 0.2053605 
## Run 12 stress 0.2050587 
## Run 13 stress 0.2258859 
## Run 14 stress 0.2048736 
## Run 15 stress 0.2202869 
## Run 16 stress 0.2042445 
## Run 17 stress 0.2101723 
## Run 18 stress 0.2034738 
## ... New best solution
## ... Procrustes: rmse 0.01413202  max resid 0.08779577 
## Run 19 stress 0.2092567 
## Run 20 stress 0.2236348 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      8: no. of iterations >= maxit
##     12: stress ratio > sratmax

permanova(ps.rare.prop, 'Household+Sex', 'bray')
## PERMANOVA Model: bray~Household+Sex; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##           Df SumOfSqs      R2      F    Pr(>F)    
## Household 48  12.2360 0.68899 2.2384 9.999e-05 ***
## Sex        1   0.0570 0.00321 0.5005    0.9816    
## Residual  48   5.4663 0.30780                     
## Total     97  17.7594 1.00000                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permdisp(ps.rare.prop, 'Sex', 'bray')
## PERMDISP Model: bray~Sex
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)
## Groups     1 0.01785 0.017847 1.2103  10000 0.2796
## Residuals 96 1.41562 0.014746

Weighted-Unifrac distance

plot_ord(ps.rare.prop, 'Sex','MDS','wunifrac')

plot_ord(ps.rare.prop, 'Sex','NMDS','wunifrac')
## Run 0 stress 0.1675024 
## Run 1 stress 0.1724029 
## Run 2 stress 0.1744982 
## Run 3 stress 0.1700751 
## Run 4 stress 0.1709136 
## Run 5 stress 0.1692883 
## Run 6 stress 0.1734891 
## Run 7 stress 0.1801123 
## Run 8 stress 0.1699995 
## Run 9 stress 0.1731237 
## Run 10 stress 0.1767347 
## Run 11 stress 0.1661457 
## ... New best solution
## ... Procrustes: rmse 0.06489292  max resid 0.233865 
## Run 12 stress 0.1678536 
## Run 13 stress 0.1840589 
## Run 14 stress 0.1757289 
## Run 15 stress 0.1722043 
## Run 16 stress 0.1701955 
## Run 17 stress 0.1725747 
## Run 18 stress 0.1765894 
## Run 19 stress 0.1678513 
## Run 20 stress 0.17223 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      2: no. of iterations >= maxit
##     18: stress ratio > sratmax

permanova(ps.rare.prop, 'Household+Sex', 'wunifrac')
## PERMANOVA Model: wunifrac~Household+Sex; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##           Df SumOfSqs      R2      F    Pr(>F)    
## Household 48  1.90478 0.67668 2.1024 9.999e-05 ***
## Sex        1  0.00413 0.00147 0.2187    0.9964    
## Residual  48  0.90599 0.32186                     
## Total     97  2.81490 1.00000                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permdisp(ps.rare.prop, 'Sex', 'wunifrac')
## PERMDISP Model: wunifrac~Sex
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
## Groups     1 0.00170 0.0016985 0.4261  10000 0.5174
## Residuals 96 0.38268 0.0039862

Age Effect

Alpha Diversity

sam[which(is.na(sam$Age..months.)),'Household'] 
## [1] "9"  "24"
# remove households that have dog with unspecific age
sam.Age <- sam %>% filter(!(Household %in% c('9', '24')))
ggplot(sam.Age,) +
    geom_line(aes(x = as.numeric(Household), y = Age..months., group = Household)) + 
    geom_point(aes(x = as.numeric(Household), y = Age..months., group = Household, colour = Epileptic.or.Control)) +
    xlab('Household') + ylab('Age in month')

anova(lm(Shannon~Household+Age..months., data = sam.Age))
## Analysis of Variance Table
## 
## Response: Shannon
##              Df Sum Sq  Mean Sq F value Pr(>F)
## Household    46 7.5091 0.163240  1.3217 0.1738
## Age..months.  1 0.0014 0.001369  0.0111 0.9166
## Residuals    46 5.6815 0.123511

Beta Diversity

Bray-Curtis distance

ps.age <- ps.rare.prop %>% subset_samples(!(Household %in% c('9', '24')))
permanova(ps.age, 'Age..months.', 'bray', strata = 'Household')
## PERMANOVA Model: bray~Age..months.; Strata: Household
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  strata 
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##              Df SumOfSqs      R2      F Pr(>F)
## Age..months.  1   0.2707 0.01579 1.4763 0.3004
## Residual     92  16.8709 0.98421              
## Total        93  17.1416 1.00000

Weighted-Unifrac distance

permanova(ps.age, 'Age..months.', 'wunifrac')
## PERMANOVA Model: wunifrac~Age..months.; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##              Df SumOfSqs      R2      F Pr(>F)
## Age..months.  1   0.0394 0.01449 1.3531 0.1859
## Residual     92   2.6791 0.98551              
## Total        93   2.7185 1.00000
sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.4.1 (2024-06-14)
##  os       macOS 15.1.1
##  system   aarch64, darwin20
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       America/New_York
##  date     2024-12-27
##  pandoc   3.5 @ /Users/yixuanyang/miniforge3/bin/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package          * version  date (UTC) lib source
##  ade4               1.7-22   2023-02-06 [1] CRAN (R 4.4.0)
##  ape                5.8-1    2024-12-16 [1] CRAN (R 4.4.1)
##  Biobase            2.66.0   2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
##  BiocGenerics       0.52.0   2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
##  biomformat         1.34.0   2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
##  Biostrings         2.74.0   2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
##  bslib              0.8.0    2024-07-29 [1] CRAN (R 4.4.0)
##  cachem             1.1.0    2024-05-16 [1] CRAN (R 4.4.0)
##  cli                3.6.3    2024-06-21 [1] CRAN (R 4.4.0)
##  cluster            2.1.8    2024-12-11 [1] CRAN (R 4.4.1)
##  codetools          0.2-20   2024-03-31 [1] CRAN (R 4.4.1)
##  colorspace         2.1-1    2024-07-26 [1] CRAN (R 4.4.0)
##  crayon             1.5.3    2024-06-20 [1] CRAN (R 4.4.0)
##  data.table         1.16.4   2024-12-06 [1] CRAN (R 4.4.1)
##  digest             0.6.37   2024-08-19 [1] CRAN (R 4.4.1)
##  dplyr            * 1.1.4    2023-11-17 [1] CRAN (R 4.4.0)
##  evaluate           1.0.1    2024-10-10 [1] CRAN (R 4.4.1)
##  farver             2.1.2    2024-05-13 [1] CRAN (R 4.4.0)
##  fastmap            1.2.0    2024-05-15 [1] CRAN (R 4.4.0)
##  forcats          * 1.0.0    2023-01-29 [1] CRAN (R 4.4.0)
##  foreach            1.5.2    2022-02-02 [1] CRAN (R 4.4.0)
##  generics           0.1.3    2022-07-05 [1] CRAN (R 4.4.0)
##  GenomeInfoDb       1.42.0   2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
##  GenomeInfoDbData   1.2.13   2024-10-05 [1] Bioconductor
##  ggplot2          * 3.5.1    2024-04-23 [1] CRAN (R 4.4.0)
##  glue               1.8.0    2024-09-30 [1] CRAN (R 4.4.1)
##  gtable             0.3.6    2024-10-25 [1] CRAN (R 4.4.1)
##  here             * 1.0.1    2020-12-13 [1] CRAN (R 4.4.0)
##  hms                1.1.3    2023-03-21 [1] CRAN (R 4.4.0)
##  htmltools          0.5.8.1  2024-04-04 [1] CRAN (R 4.4.0)
##  httr               1.4.7    2023-08-15 [1] CRAN (R 4.4.0)
##  igraph             2.1.2    2024-12-07 [1] CRAN (R 4.4.1)
##  IRanges            2.40.0   2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
##  iterators          1.0.14   2022-02-05 [1] CRAN (R 4.4.0)
##  jquerylib          0.1.4    2021-04-26 [1] CRAN (R 4.4.0)
##  jsonlite           1.8.9    2024-09-20 [1] CRAN (R 4.4.1)
##  knitr              1.49     2024-11-08 [1] CRAN (R 4.4.1)
##  labeling           0.4.3    2023-08-29 [1] CRAN (R 4.4.0)
##  lattice          * 0.22-6   2024-03-20 [1] CRAN (R 4.4.1)
##  lifecycle          1.0.4    2023-11-07 [1] CRAN (R 4.4.0)
##  lubridate        * 1.9.4    2024-12-08 [1] CRAN (R 4.4.1)
##  magrittr           2.0.3    2022-03-30 [1] CRAN (R 4.4.0)
##  MASS               7.3-61   2024-06-13 [1] CRAN (R 4.4.0)
##  Matrix             1.7-1    2024-10-18 [1] CRAN (R 4.4.1)
##  mgcv               1.9-1    2023-12-21 [1] CRAN (R 4.4.1)
##  multtest           2.62.0   2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
##  munsell            0.5.1    2024-04-01 [1] CRAN (R 4.4.0)
##  nlme               3.1-166  2024-08-14 [1] CRAN (R 4.4.0)
##  permute          * 0.9-7    2022-01-27 [1] CRAN (R 4.4.0)
##  phyloseq         * 1.50.0   2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
##  pillar             1.10.0   2024-12-17 [1] CRAN (R 4.4.1)
##  pkgconfig          2.0.3    2019-09-22 [1] CRAN (R 4.4.0)
##  plyr               1.8.9    2023-10-02 [1] CRAN (R 4.4.0)
##  purrr            * 1.0.2    2023-08-10 [1] CRAN (R 4.4.0)
##  R6                 2.5.1    2021-08-19 [1] CRAN (R 4.4.0)
##  Rcpp               1.0.13-1 2024-11-02 [1] CRAN (R 4.4.1)
##  readr            * 2.1.5    2024-01-10 [1] CRAN (R 4.4.0)
##  reshape2           1.4.4    2020-04-09 [1] CRAN (R 4.4.0)
##  rhdf5              2.49.0   2024-05-04 [1] Bioconductor 3.20 (R 4.4.0)
##  rhdf5filters       1.17.0   2024-05-04 [1] Bioconductor 3.20 (R 4.4.0)
##  Rhdf5lib           1.27.0   2024-05-04 [1] Bioconductor 3.20 (R 4.4.0)
##  rlang              1.1.4    2024-06-04 [1] CRAN (R 4.4.0)
##  rmarkdown          2.29     2024-11-04 [1] CRAN (R 4.4.1)
##  rprojroot          2.0.4    2023-11-05 [1] CRAN (R 4.4.0)
##  rstudioapi         0.17.1   2024-10-22 [1] CRAN (R 4.4.1)
##  S4Vectors          0.44.0   2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
##  sass               0.4.9    2024-03-15 [1] CRAN (R 4.4.0)
##  scales             1.3.0    2023-11-28 [1] CRAN (R 4.4.0)
##  sessioninfo        1.2.2    2021-12-06 [1] CRAN (R 4.4.0)
##  stringi            1.8.4    2024-05-06 [1] CRAN (R 4.4.0)
##  stringr          * 1.5.1    2023-11-14 [1] CRAN (R 4.4.0)
##  survival           3.8-3    2024-12-17 [1] CRAN (R 4.4.1)
##  tibble           * 3.2.1    2023-03-20 [1] CRAN (R 4.4.0)
##  tidyr            * 1.3.1    2024-01-24 [1] CRAN (R 4.4.0)
##  tidyselect         1.2.1    2024-03-11 [1] CRAN (R 4.4.0)
##  tidyverse        * 2.0.0    2023-02-22 [1] CRAN (R 4.4.0)
##  timechange         0.3.0    2024-01-18 [1] CRAN (R 4.4.0)
##  tzdb               0.4.0    2023-05-12 [1] CRAN (R 4.4.0)
##  UCSC.utils         1.2.0    2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
##  vctrs              0.6.5    2023-12-01 [1] CRAN (R 4.4.0)
##  vegan            * 2.6-8    2024-08-28 [1] CRAN (R 4.4.1)
##  withr              3.0.2    2024-10-28 [1] CRAN (R 4.4.1)
##  xfun               0.49     2024-10-31 [1] CRAN (R 4.4.1)
##  XVector            0.46.0   2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
##  yaml               2.3.10   2024-07-26 [1] CRAN (R 4.4.0)
##  zlibbioc           1.52.0   2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
## 
##  [1] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
## 
## ──────────────────────────────────────────────────────────────────────────────